\documentclass[a4paper,10pt]{article}

\usepackage[utf8]{inputenc}
\usepackage{amsmath}

\title{Yet another derivation of the KLT update rule}
\author{}


\begin{document}
\maketitle

We want to optimize this function with respect to the displacement d,
\begin{equation}
 E(d) = \sum_{(x,y)\in W} (I(x,y) - J(x+d_x,y+d_y))^2 \ .
\end{equation}
We will do this iterativelly.

Assume that we have a previous guess $\tilde d$ of the displacement, and we only want to compute the update $\delta d$ such that
\begin{equation}
 d = \tilde d + \delta d \ .
\end{equation}
Using the notation
\begin{equation}
 \tilde J(x,y) = J(x + \tilde d_x, y + \tilde d_y) \ ,
\end{equation}
the functional to be optimized at each iteration is
\begin{equation}
 E(\delta d) = \sum_{(x,y)\in W} (I(x,y) - \tilde J(x + \delta d_x, y + \delta d_y))^2 \ .
\end{equation}
The first order approximation of $\tilde J$ with respect to  $\delta d$ is
\begin{equation}
\begin{split}
 \tilde J(x + \delta d_x, y + \delta d_y)
   & = \tilde J(x,y) + \partial_x \tilde J(x,y) \delta d_x + \partial_y \tilde J(x,y) \delta d_y
\\ & = \tilde J(x,y) + \nabla \tilde J(x,y)^T \delta d \ .
\end{split}
\end{equation}
Inserting this into the functional gives
\begin{equation}
 E(\delta d) \approx \sum_{(x,y)\in W} (I(x,y) - \tilde J(x,y) - \nabla \tilde J(x,y)^T \delta d)^2 \ .
\end{equation}
Deriving with respect to $\delta d$ and equalling to 0 gives
\begin{equation}
 0 =
 \sum_{(x,y)\in W} (I(x,y) - \tilde J(x,y)
          - \nabla \tilde J(x,y)^T \delta d)
        \nabla \tilde J(x,y) \ ,
\end{equation}
which is a 2-vector equallity.  Factorizing $\delta d$,
\begin{equation}
 \sum_{(x,y)\in W} \nabla \tilde J(x,y) \nabla \tilde J(x,y)^T \  \delta d
 = \sum_{(x,y)\in W} (I(x,y) - \tilde J(x,y)) \nabla \tilde J(x,y)
\end{equation}
which is a set of 2 linear equations on $\delta d$.

We define now the gradient matrix as
\begin{equation}
 Z = \sum_{(x,y)\in W} \nabla \tilde J(x,y) \nabla \tilde J(x,y)^T
= \sum_{(x,y)\in W}
  \begin{pmatrix}
   (\partial_x \tilde J)^2
&  \partial_x \tilde J \partial_y\tilde J
\\ \partial_x \tilde J \partial_y\tilde J
&  (\partial_y \tilde J)^2
  \end{pmatrix}
\end{equation}
and error vector as
\begin{equation}
 \mathbf e = \sum_{(x,y)\in W} (I(x,y) - \tilde J(x,y)) \nabla \tilde J(x,y) \ .
\end{equation}
The system to solve at each iteration is
\begin{equation}
 Z \delta d = \mathbf e \ .
\end{equation}


\section{Implementation Details}
\subsection{Smoothing the Images}
Before tracking images should be slightly smoothed.  There are two reasons for this:
\begin{enumerate}
 \item The linearization of $J$ only makes sense if $J$ is smooth.
 \item The tracking equations require to compute the gradient of $J$.  Given sampling of $J$ at a certain scale, there is no way to get its gradient at the same scale with an accurate precision.  We can however compute the gradient at a smoother scale by convolving $J$ with the derivatives of a Gaussian kernel.  This is
 \begin{equation}
  \bar J = G \star J  \quad \text{and} \quad \nabla\bar J = \nabla G \star J \ .
 \end{equation}
When doing this, we are computing the derivatives of the smooth version $\bar J$, not the derivatives of $J$.  Therefore, we have to use the smooth version $\bar J$ in all the equations instead of $J$, if we want the derivatives to be coherent with the image.
\end{enumerate}
Since we smooth $J$, it seems reasonable to also smooth $I$ so that they are at the same scale.

To sum up, given the two images $I$ and $J$, tracking has to be done over their smoothed versions
\begin{equation}
  \bar I = G \star I \ , \quad 
  \bar J = G \star J  \quad \text{and} \quad
  \nabla\bar J = \nabla G \star J \ .
\end{equation}


\end{document}
